Spaces:

quandn2003
/

LoGoSAM_demo

Sleeping

File size: 5,853 Bytes

427d150

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.

# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

import numpy as np
import torch
from torch.nn import functional as F
from torchvision.transforms.functional import resize, to_pil_image  # type: ignore
from typing import List

from copy import deepcopy
from typing import Tuple


class ResizeLongestSide:
    """

    Resizes images to the longest side 'target_length', as well as provides

    methods for resizing coordinates and boxes. Provides methods for

    transforming both numpy array and batched torch tensors.

    """

    def __init__(self, target_length: int,

                pixel_mean: List[float] = [123.675, 116.28, 103.53],

                pixel_std: List[float] = [58.395, 57.12, 57.375],) -> None:
        
        self.target_length = target_length
        self.pixel_mean = torch.Tensor(pixel_mean).view(-1, 1, 1)
        self.pixel_std = torch.Tensor(pixel_std).view(-1, 1, 1)


    def apply_image(self, image: np.ndarray) -> np.ndarray:
        """

        Expects a numpy array with shape HxWxC in uint8 format.

        """
        target_size = self.get_preprocess_shape(image.shape[0], image.shape[1], self.target_length)
        return np.array(resize(to_pil_image(image), target_size))

    def apply_coords(self, coords: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray:
        """

        Expects a numpy array of length 2 in the final dimension. Requires the

        original image size in (H, W) format.

        """
        old_h, old_w = original_size
        new_h, new_w = self.get_preprocess_shape(
            original_size[0], original_size[1], self.target_length
        )
        coords = deepcopy(coords).astype(float)
        coords[..., 0] = coords[..., 0] * (new_w / old_w)
        coords[..., 1] = coords[..., 1] * (new_h / old_h)
        return coords

    def apply_boxes(self, boxes: np.ndarray, original_size: Tuple[int, ...]) -> np.ndarray:
        """

        Expects a numpy array shape Bx4. Requires the original image size

        in (H, W) format.

        """
        boxes = self.apply_coords(boxes.reshape(-1, 2, 2), original_size)
        return boxes.reshape(-1, 4)

    def apply_image_torch(self, image: torch.Tensor) -> torch.Tensor:
        """

        Expects batched images with shape BxCxHxW and float format. This

        transformation may not exactly match apply_image. apply_image is

        the transformation expected by the model.

        """
        # Expects an image in BCHW format. May not exactly match apply_image.
        target_size = self.get_preprocess_shape(image.shape[-2], image.shape[-1], self.target_length)
        if len(image.shape) == 3:
            image = image.unsqueeze(0)
            image = F.interpolate(
                                    image, target_size,
                                    mode="bilinear", 
                                    align_corners=False, 
                                    antialias=True
                                    )
            return image.squeeze(0)
        elif len(image.shape) == 2:
            image = image.unsqueeze(0).unsqueeze(0)
            image = F.interpolate(
                                    image, target_size,
                                    mode="bilinear", 
                                    align_corners=False, 
                                    antialias=True
                                    )
            return image.squeeze(0).squeeze(0)
        
        else:
            return F.interpolate(
                image, target_size, mode="bilinear", align_corners=False, antialias=True
            )
    
    def preprocess(self, x: torch.Tensor) -> torch.Tensor:
        """Normalize pixel values and pad to a square input."""
        # Normalize colors
        if len(x.shape)==2:
            pass
        else:
            device = x.device
            x = (x - self.pixel_mean.to(device)) / self.pixel_std.to(device) # TODO uncomment this
            # x = x / 255
            pass

        # Pad
        h, w = x.shape[-2:]
        padh = self.target_length - h
        padw = self.target_length - w
        x = F.pad(x, (0, padw, 0, padh))
        return x
    

    def apply_coords_torch(

        self, coords: torch.Tensor, original_size: Tuple[int, ...]

    ) -> torch.Tensor:
        """

        Expects a torch tensor with length 2 in the last dimension. Requires the

        original image size in (H, W) format.

        """
        old_h, old_w = original_size
        new_h, new_w = self.get_preprocess_shape(
            original_size[0], original_size[1], self.target_length
        )
        coords = deepcopy(coords).to(torch.float)
        coords[..., 0] = coords[..., 0] * (new_w / old_w)
        coords[..., 1] = coords[..., 1] * (new_h / old_h)
        return coords

    def apply_boxes_torch(

        self, boxes: torch.Tensor, original_size: Tuple[int, ...]

    ) -> torch.Tensor:
        """

        Expects a torch tensor with shape Bx4. Requires the original image

        size in (H, W) format.

        """
        boxes = self.apply_coords_torch(boxes.reshape(-1, 2, 2), original_size)
        return boxes.reshape(-1, 4)

    @staticmethod
    def get_preprocess_shape(oldh: int, oldw: int, long_side_length: int) -> Tuple[int, int]:
        """

        Compute the output size given input size and target long side length.

        """
        scale = long_side_length * 1.0 / max(oldh, oldw)
        newh, neww = oldh * scale, oldw * scale
        neww = int(neww + 0.5)
        newh = int(newh + 0.5)
        return (newh, neww)