Spaces:

JinhuaL1ANG
/

AudioMorphix

Running on Zero

File size: 1,246 Bytes

9a6dac6

import torch
from specvqgan.modules.losses.vggishish.transforms import Crop


class FromMinusOneOneToZeroOne(object):
    """Actually, it doesnot do [-1, 1] --> [0, 1] as promised. It would, if inputs would be in [-1, 1]
    but reconstructed specs are not."""

    def __call__(self, item):
        item["image"] = (item["image"] + 1) / 2
        return item


class CropNoDict(Crop):
    def __init__(self, cropped_shape, random_crop=None):
        super().__init__(cropped_shape=cropped_shape, random_crop=random_crop)

    def __call__(self, x):
        # albumentations expect an ndarray of size (H, W, ...) but we have tensor of size (B, H, W).
        # we will assume that the batch-dim (B) is out "channel" dim and permute it to the end.
        # Finally, we change the type back to Torch.Tensor.
        x = self.preprocessor(image=x.permute(1, 2, 0).numpy())["image"].transpose(
            2, 0, 1
        )
        return torch.from_numpy(x)


class GetInputFromBatchByKey(object):  # get image from item dict
    def __init__(self, input_key):
        self.input_key = input_key

    def __call__(self, item):
        return item[self.input_key]


class ToFloat32(object):
    def __call__(self, item):
        return item.float()