import random import torch from albumentations.pytorch import ToTensorV2 import albumentations as A import cv2 import glob2 import config import numpy as np import matplotlib.pyplot as plt import matplotlib.patches as patches from lightning_utils import YOLOv3Lightning from pytorch_grad_cam import GradCAM, EigenCAM from pytorch_grad_cam.utils.image import show_cam_on_image, scale_cam_image from pytorch_grad_cam.utils.model_targets import FasterRCNNBoxScoreTarget from utils import cells_to_bboxes, non_max_suppression cmap = plt.get_cmap("tab20b") class_labels = config.PASCAL_CLASSES height, width = config.INFERENCE_IMAGE_SIZE, config.INFERENCE_IMAGE_SIZE colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))] icons = [ 'flight', 'pedal_bike', 'flutter_dash', 'sailing', 'liquor', 'directions_bus', 'directions_car', 'pets', "chair", 'pets', 'table_restaurant', 'pets', 'bedroom_baby', 'motorcycle', 'person', 'yard', 'kebab_dining', 'chair', "train", "tvmonitor"] icons_mapping = {config.PASCAL_CLASSES[i]:icons[i] for i in range(len(icons))} model = YOLOv3Lightning.load_from_checkpoint('YoLoV3Model2.ckpt', map_location=torch.device('cpu')) model.eval() scaled_anchors = ( torch.tensor(config.ANCHORS) * torch.tensor(config.S[0]).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2) ).to(config.DEVICE) def get_examples(): example_images = glob2.glob('*.jpg') example_transparency = [random.choice([0.7, 0.8]) for r in range(len(example_images))] examples = [[example_images[i], example_transparency[i]] for i in range(len(example_images))] return(examples) def yolov3_reshape_transform(x): activations = [] size = x[0].size()[2:4] for x_item in x: x_permute = x_item.permute(0, 1, 4, 2, 3 ) x_permute = x_permute.reshape((x_permute.shape[0], x_permute.shape[1]*x_permute.shape[2], *x_permute.shape[3:])) activations.append(torch.nn.functional.interpolate(torch.abs(x_permute), size, mode='bilinear')) activations = torch.cat(activations, axis=1) return(activations) def infer_transform(IMAGE_SIZE=config.INFERENCE_IMAGE_SIZE): transforms = A.Compose( [ A.LongestMaxSize(max_size=IMAGE_SIZE), A.PadIfNeeded( min_height=IMAGE_SIZE, min_width=IMAGE_SIZE, border_mode=cv2.BORDER_CONSTANT ), A.Normalize(mean=[0.45484068, 0.43406072, 0.40103856], std=[0.23936155, 0.23471538, 0.23876129], max_pixel_value=255,), ToTensorV2(), ] ) return(transforms) def generate_html(): # Start the HTML string with some style and the Material Icons stylesheet classes = config.PASCAL_CLASSES html_string = """
Object Detection Prediction & Grad-Cam for YOLOv3
Supported Classes
""" # Loop through each class and add it to the HTML string with its corresponding icon for class_name in classes: icon_name = class_name.lower() # Assuming the icon name is the lowercase version of the class name icon_name = icons_mapping[icon_name] html_string += f'
{icon_name}{class_name}
' # Close the HTML string html_string += "
" return html_string def upload_image_inference(img, transparency): bboxes = [[] for _ in range(1)] nms_boxes_output, annotations = [], [] img_copy = img.copy() transform = infer_transform() img = transform(image=img)['image'].unsqueeze(0) out = model(img) for i in range(3): batch_size, A, S, _, _ = out[i].shape anchor = scaled_anchors[i] boxes_scale_i = cells_to_bboxes( out[i], anchor, S=S, is_preds=True ) for idx, (box) in enumerate(boxes_scale_i): bboxes[idx] += box for i in range(img.shape[0]): iou_thresh, thresh = 0.5, 0.6 nms_boxes = non_max_suppression( bboxes[i], iou_threshold=iou_thresh, threshold=thresh, box_format="midpoint", ) nms_boxes_output.append(nms_boxes) for box in nms_boxes_output[0]: class_prediction = int(box[0]) box = box[2:] upper_left_x = box[0] - box[2] / 2 upper_left_y = box[1] - box[3] / 2 rect = patches.Rectangle( (upper_left_x * width, upper_left_y * height), box[2] * width, box[3] * height, linewidth=2, edgecolor=colors[class_prediction], facecolor="none", ) rect = rect.get_bbox().get_points() annotations.append([rect[0].astype(int).tolist()+rect[1].astype(int).tolist(), config.PASCAL_CLASSES[class_prediction]]) new_bboxes = [a[0] for a in annotations] new_bboxes = [box for box in new_bboxes if all(val >= 0 for val in box)] objs = [b[1] for b in nms_boxes_output[0]] bbox_coord = [b[2:] for b in nms_boxes_output[0]] targets = [FasterRCNNBoxScoreTarget(objs, bbox_coord)] cam = EigenCAM(model=model, target_layers=[model.model], reshape_transform=yolov3_reshape_transform) grayscale_cam = cam(input_tensor=img, targets=targets) grayscale_cam = grayscale_cam[0, :] visualization = show_cam_on_image(img_copy/255, grayscale_cam, use_rgb=True, image_weight=transparency) renormalized_cam = np.zeros(grayscale_cam.shape, dtype=np.float32) for x1, y1, x2, y2 in new_bboxes: renormalized_cam[y1:y2, x1:x2] = scale_cam_image(grayscale_cam[y1:y2, x1:x2].copy()) renormalized_cam = scale_cam_image(renormalized_cam) eigencam_image_renormalized = show_cam_on_image(np.float32(img_copy)/255, renormalized_cam, use_rgb=True, image_weight=transparency) return([[img_copy, annotations], [grayscale_cam, renormalized_cam, visualization, eigencam_image_renormalized]])