anonymous-upload-neurips-2025's picture
Upload 1784 files
8c9048a verified
from diffusers import StableDiffusionBrushNetPipeline, BrushNetModel, UniPCMultistepScheduler
import torch
import cv2
import json
import os
import numpy as np
from PIL import Image
import argparse
import pandas as pd
import torch
from torchvision.transforms import Resize
from torchvision import transforms
import torch.nn.functional as F
import numpy as np
from torchmetrics.multimodal import CLIPScore
from torchmetrics.image import PeakSignalNoiseRatio, StructuralSimilarityIndexMeasure
from torchmetrics.image.lpip import LearnedPerceptualImagePatchSimilarity
from torchmetrics.regression import MeanSquaredError
from urllib.request import urlretrieve
from PIL import Image
import open_clip
import os
import hpsv2
import ImageReward as RM
import math
from transformers import AutoProcessor, AutoModel
def rle2mask(mask_rle, shape): # height, width
starts, lengths = [np.asarray(x, dtype=int) for x in (mask_rle[0:][::2], mask_rle[1:][::2])]
starts -= 1
ends = starts + lengths
binary_mask = np.zeros(shape[0] * shape[1], dtype=np.uint8)
for lo, hi in zip(starts, ends):
binary_mask[lo:hi] = 1
return binary_mask.reshape(shape)
class MetricsCalculator:
def __init__(self, device,ckpt_path="../../data/ckpt") -> None:
self.device=device
# clip
self.clip_metric_calculator = CLIPScore(model_name_or_path="openai/clip-vit-large-patch14").to(device)
# lpips
self.lpips_metric_calculator = LearnedPerceptualImagePatchSimilarity(net_type='squeeze').to(device)
# aesthetic model
self.aesthetic_model = torch.nn.Linear(768, 1)
aesthetic_model_url = (
"https://github.com/LAION-AI/aesthetic-predictor/blob/main/sa_0_4_vit_l_14_linear.pth?raw=true"
)
aesthetic_model_ckpt_path=os.path.join(ckpt_path,"sa_0_4_vit_l_14_linear.pth")
urlretrieve(aesthetic_model_url, aesthetic_model_ckpt_path)
self.aesthetic_model.load_state_dict(torch.load(aesthetic_model_ckpt_path))
self.aesthetic_model.eval()
self.clip_model, _, self.clip_preprocess = open_clip.create_model_and_transforms('ViT-L-14', pretrained='openai')
# image reward model
self.imagereward_model = RM.load("ImageReward-v1.0")
def calculate_image_reward(self,image,prompt):
reward = self.imagereward_model.score(prompt, [image])
return reward
def calculate_hpsv21_score(self,image,prompt):
result = hpsv2.score(image, prompt, hps_version="v2.1")[0]
return result.item()
def calculate_aesthetic_score(self,img):
image = self.clip_preprocess(img).unsqueeze(0)
with torch.no_grad():
image_features = self.clip_model.encode_image(image)
image_features /= image_features.norm(dim=-1, keepdim=True)
prediction = self.aesthetic_model(image_features)
return prediction.cpu().item()
def calculate_clip_similarity(self, img, txt):
img = np.array(img)
img_tensor=torch.tensor(img).permute(2,0,1).to(self.device)
score = self.clip_metric_calculator(img_tensor, txt)
score = score.cpu().item()
return score
def calculate_psnr(self, img_pred, img_gt, mask=None):
img_pred = np.array(img_pred).astype(np.float32)/255.
img_gt = np.array(img_gt).astype(np.float32)/255.
assert img_pred.shape == img_gt.shape, "Image shapes should be the same."
if mask is not None:
mask = np.array(mask).astype(np.float32)
img_pred = img_pred * mask
img_gt = img_gt * mask
difference = img_pred - img_gt
difference_square = difference ** 2
difference_square_sum = difference_square.sum()
difference_size = mask.sum()
mse = difference_square_sum/difference_size
if mse < 1.0e-10:
return 1000
PIXEL_MAX = 1
return 20 * math.log10(PIXEL_MAX / math.sqrt(mse))
def calculate_lpips(self, img_gt, img_pred, mask=None):
img_pred = np.array(img_pred).astype(np.float32)/255
img_gt = np.array(img_gt).astype(np.float32)/255
assert img_pred.shape == img_gt.shape, "Image shapes should be the same."
if mask is not None:
mask = np.array(mask).astype(np.float32)
img_pred = img_pred * mask
img_gt = img_gt * mask
img_pred_tensor=torch.tensor(img_pred).permute(2,0,1).unsqueeze(0).to(self.device)
img_gt_tensor=torch.tensor(img_gt).permute(2,0,1).unsqueeze(0).to(self.device)
score = self.lpips_metric_calculator(img_pred_tensor*2-1,img_gt_tensor*2-1)
score = score.cpu().item()
return score
def calculate_mse(self, img_pred, img_gt, mask=None):
img_pred = np.array(img_pred).astype(np.float32)/255.
img_gt = np.array(img_gt).astype(np.float32)/255.
assert img_pred.shape == img_gt.shape, "Image shapes should be the same."
if mask is not None:
mask = np.array(mask).astype(np.float32)
img_pred = img_pred * mask
img_gt = img_gt * mask
difference = img_pred - img_gt
difference_square = difference ** 2
difference_square_sum = difference_square.sum()
difference_size = mask.sum()
mse = difference_square_sum/difference_size
return mse.item()
device = "cuda" if torch.cuda.is_available() else "cpu"
cc3m_base_dir = "/home/kis/datasets/cc3m_attempt12"
cc3m_full_regen = "/home/kis/Downloads/imgs"
# annotations = pd.read_csv("annotations/annotations_synthetic_100percent.txt", sep='\t', header=None, dtype=str)
cc3m_annotations_full = pd.read_csv("../../../../datasets/cc3m/Train_GCC-training.tsv", sep='\t', header=0)
# evaluation
# evaluation_df = pd.DataFrame(columns=['Image ID','Image Reward', 'HPS V2.1', 'Aesthetic Score'])#, 'CLIP Similarity'])
evaluation_df = pd.DataFrame(columns=['Image ID', 'Aesthetic Score'])#, 'CLIP Similarity'])
metrics_calculator=MetricsCalculator(device)
mask_root = "../../../SemanticSegmentation/mask_skin"
prev = None
gender = "real"
root = "PATH_TO_THE_ROOT"
if gender == "man":
img_root = root + "/foreground_syn_men/"
elif gender == "woman":
img_root = root + "/foreground_syn_women/"
elif gender == "real":
img_root = root + "/foreground/"
annotations = cc3m_annotations_full
elif gender == "cvpr":
img_root = root + "/foreground_cvpr_images/"
annotations = pd.read_csv("/home/kis/Downloads/annotations_cvpr.csv")
elif gender == "syn":
img_root = root + "/foreground_fully_synthetic/"
# annotations = pd.read_csv("/home/kis/Downloads/annotations_cvpr.csv")
elif gender == "coco":
img_root = root + "/foreground_coco_counterfactuals/"
annotations = pd.read_json(path_or_buf=root + "/coco_counterfactuals.jsonl", lines=True)
annotations = annotations.set_index("id")
for fname in os.listdir(img_root):
image_name = fname.split(".")[0]
if "mask" in image_name or "original" in image_name:
continue
image_name = fname.split("_")[0]
print(f"evaluating image {image_name} ...")
image_id = str(image_name).zfill(9)
if gender in ["man", "woman"]:
caption = annotations[(annotations[0]==image_name) & (annotations[1]==gender)][2].item()
elif gender == "real":
caption = annotations.iloc[int(image_id), 0]
elif gender == "cvpr":
caption = annotations.iloc[int(image_name), 1]
elif gender == "syn":
caption = annotations.iloc[int(image_id), 0]
elif gender == "coco":
image_id = f'{fname.split("_")[0]}{fname.split("_")[1]}'
n = int(fname.split(".")[0][-1]) # 0 or 1 in the end of the filename - two counterfactuals
caption = annotations.loc[int(image_id)][n]
image_path = f"{img_root}/{fname}"
prompt = caption
try:
src_image_path = image_path
src_image = Image.open(src_image_path).resize((512,512))
evaluation_result=[str(image_id)+"_"+str(gender)]
except FileNotFoundError:
continue
success = True
for metric in evaluation_df.columns.values.tolist()[1:]:
print(f"evaluating metric: {metric}")
try:
if metric == 'Image Reward':
metric_result = metrics_calculator.calculate_image_reward(src_image,prompt)
if metric == 'HPS V2.1':
metric_result = metrics_calculator.calculate_hpsv21_score(src_image,prompt)
if metric == 'Aesthetic Score':
metric_result = metrics_calculator.calculate_aesthetic_score(src_image)
if metric == 'CLIP Similarity':
metric_result = metrics_calculator.calculate_clip_similarity(src_image, prompt)
evaluation_result.append(metric_result)
except RuntimeError:
success = False
break
if success:
evaluation_df.loc[len(evaluation_df.index)] = evaluation_result
evaluation_df.to_csv(f"evaluation/evaluation_result_{gender}.csv")
print("The averaged evaluation result:")
averaged_results=evaluation_df.mean(numeric_only=True)
print(averaged_results)
averaged_results.to_csv(f"evaluation/evaluation_result_{gender}.csv")
evaluation_df.to_csv(f"evaluation/evaluation_result_{gender}.csv")
print(f"The generated images and evaluation results is saved in ./evaluation/")