Spaces:

leandrumartin
/

csci5930assignment2

Sleeping

App Files Files Community

leandrumartin commited on Mar 13

Commit

a98adb9

verified ·

1 Parent(s): d55a5fe

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +468 -0

app.py ADDED Viewed

	@@ -0,0 +1,468 @@

+import gradio as gr
+import torch
+from PIL import Image
+from torchvision import transforms
+import numpy as np
+import random
+import torch.nn as nn
+from torch.utils.data import DataLoader, Dataset
+from torchvision.models.resnet import ResNet50_Weights
+from typing import Type, Any, Callable, Union, List, Optional
+from torch import Tensor
+from huggingface_hub import hf_hub_download
+username = "leandrumartin"
+model_repo = "assignment2model"
+model_path = hf_hub_download(repo_id=f"{username}/{model_repo}", filename="clothing1m.pth")
+CATEGORY_NAMES = ['T-Shirt', 'Shirt', 'Knitwear', 'Chiffon', 'Sweater', 'Hoodie', 'Windbreaker', 'Jacket', 'Downcoat', 'Suit', 'Shawl', 'Dress', 'Vest', 'Underwear']
+def conv3x3(in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1) -> nn.Conv2d:
+    """3x3 convolution with padding"""
+    return nn.Conv2d(
+        in_planes,
+        out_planes,
+        kernel_size=3,
+        stride=stride,
+        padding=dilation,
+        groups=groups,
+        bias=False,
+        dilation=dilation,
+    )
+def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d:
+    """1x1 convolution"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+class BasicBlock(nn.Module):
+    expansion: int = 1
+    def __init__(
+            self,
+            inplanes: int,
+            planes: int,
+            stride: int = 1,
+            downsample: Optional[nn.Module] = None,
+            groups: int = 1,
+            base_width: int = 64,
+            dilation: int = 1,
+            norm_layer: Optional[Callable[..., nn.Module]] = None,
+    ) -> None:
+        super().__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        if groups != 1 or base_width != 64:
+            raise ValueError("BasicBlock only supports groups=1 and base_width=64")
+        if dilation > 1:
+            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
+        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = norm_layer(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = norm_layer(planes)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x: Tensor) -> Tensor:
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
+    # while original implementation places the stride at the first 1x1 convolution(self.conv1)
+    # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
+    # This variant is also known as ResNet V1.5 and improves accuracy according to
+    # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
+    expansion: int = 4
+    def __init__(
+            self,
+            inplanes: int,
+            planes: int,
+            stride: int = 1,
+            downsample: Optional[nn.Module] = None,
+            groups: int = 1,
+            base_width: int = 64,
+            dilation: int = 1,
+            norm_layer: Optional[Callable[..., nn.Module]] = None,
+    ) -> None:
+        super().__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        width = int(planes * (base_width / 64.0)) * groups
+        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv1x1(inplanes, width)
+        self.bn1 = norm_layer(width)
+        self.conv2 = conv3x3(width, width, stride, groups, dilation)
+        self.bn2 = norm_layer(width)
+        self.conv3 = conv1x1(width, planes * self.expansion)
+        self.bn3 = norm_layer(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x: Tensor) -> Tensor:
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class ResNet(nn.Module):
+    def __init__(
+            self,
+            block: Type[Union[BasicBlock, Bottleneck]],
+            layers: List[int],
+            num_classes: int = 1000,
+            show: bool = False,
+            zero_init_residual: bool = False,
+            groups: int = 1,
+            width_per_group: int = 64,
+            replace_stride_with_dilation: Optional[List[bool]] = None,
+            norm_layer: Optional[Callable[..., nn.Module]] = None,
+    ) -> None:
+        super().__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        self._norm_layer = norm_layer
+        self.show = show
+        self.inplanes = 64
+        self.dilation = 1
+        if replace_stride_with_dilation is None:
+            # each element in the tuple indicates if we should replace
+            # the 2x2 stride with a dilated convolution instead
+            replace_stride_with_dilation = [False, False, False]
+        if len(replace_stride_with_dilation) != 3:
+            raise ValueError(
+                "replace_stride_with_dilation should be None "
+                f"or a 3-element tuple, got {replace_stride_with_dilation}"
+            )
+        self.groups = groups
+        self.base_width = width_per_group
+        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
+        self.bn1 = norm_layer(self.inplanes)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0])
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1])
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2])
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(512 * block.expansion, num_classes)
+        # self.fc1 = nn.Linear(512 * block.expansion, 512)
+        # self.lu = nn.LeakyReLU(0.1, inplace=True)
+        # self.fc2 = nn.Linear(512, num_classes)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+        # Zero-initialize the last BN in each residual branch,
+        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
+        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
+        if zero_init_residual:
+            for m in self.modules():
+                if isinstance(m, Bottleneck) and m.bn3.weight is not None:
+                    nn.init.constant_(m.bn3.weight, 0)  # type: ignore[arg-type]
+                elif isinstance(m, BasicBlock) and m.bn2.weight is not None:
+                    nn.init.constant_(m.bn2.weight, 0)  # type: ignore[arg-type]
+    def _make_layer(
+            self,
+            block: Type[Union[BasicBlock, Bottleneck]],
+            planes: int,
+            blocks: int,
+            stride: int = 1,
+            dilate: bool = False,
+    ) -> nn.Sequential:
+        norm_layer = self._norm_layer
+        downsample = None
+        previous_dilation = self.dilation
+        if dilate:
+            self.dilation *= stride
+            stride = 1
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                norm_layer(planes * block.expansion),
+            )
+        layers = []
+        layers.append(
+            block(
+                self.inplanes, planes, stride, downsample, self.groups, self.base_width, previous_dilation, norm_layer
+            )
+        )
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(
+                block(
+                    self.inplanes,
+                    planes,
+                    groups=self.groups,
+                    base_width=self.base_width,
+                    dilation=self.dilation,
+                    norm_layer=norm_layer,
+                )
+            )
+        return nn.Sequential(*layers)
+    def _forward_impl(self, x: Tensor) -> Tensor:
+        # See note [TorchScript super()]
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.avgpool(x)
+        x = torch.flatten(x, 1)
+        out = self.fc(x)
+        # x = self.lu(self.fc1(x))
+        # out = self.fc2(x)
+        if self.show:
+            return out, x
+        else:
+            return out
+    def forward(self, x: Tensor) -> Tensor:
+        return self._forward_impl(x)
+def _resnet(
+        block: Type[Union[BasicBlock, Bottleneck]],
+        layers: List[int],
+        num_classes,
+        show,
+        **kwargs: Any,
+) -> ResNet:
+    model = ResNet(block, layers, num_classes, show, **kwargs)
+    return model
+def resnet50(num_classes, show=False, **kwargs: Any) -> ResNet:
+    """ResNet-50 from `Deep Residual Learning for Image Recognition <https://arxiv.org/pdf/1512.03385.pdf>`__.
+    .. note::
+       The bottleneck of TorchVision places the stride for downsampling to the second 3x3
+       convolution while the original paper places it to the first 1x1 convolution.
+       This variant improves the accuracy and is known as `ResNet V1.5
+       <https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch>`_.
+    Args:
+        weights (:class:`~torchvision.models.ResNet50_Weights`, optional): The
+            pretrained weights to use. See
+            :class:`~torchvision.models.ResNet50_Weights` below for
+            more details, and possible values. By default, no pre-trained
+            weights are used.
+        progress (bool, optional): If True, displays a progress bar of the
+            download to stderr. Default is True.
+        **kwargs: parameters passed to the ``torchvision.models.resnet.ResNet``
+            base class. Please refer to the `source code
+            <https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py>`_
+            for more details about this class.
+    .. autoclass:: torchvision.models.ResNet50_Weights
+        :members:
+    """
+    return _resnet(Bottleneck, [3, 4, 6, 3], num_classes, show, **kwargs)
+class Clothing1M(Dataset):
+    def __init__(self, image, train=True, transform=None, target_transform=None, augment=False, mode='noisy'):
+        self.image = image
+        self.transform = transform
+        self.target_transform = target_transform
+        self.augment = augment
+        self.train = False
+        self.mode = mode
+        self.data = [self.image]
+    def __getitem__(self, index):
+        img, target = self.data[index], 0
+        # to return a PIL Image
+        # img_origin = Image.open(img).convert('RGB')
+        img_origin = Image.fromarray(img).convert('RGB')
+        if self.transform is not None:
+            img = self.transform(img_origin)
+            if self.augment:
+                img1 = self.transform(img_origin)
+        if self.target_transform is not None:
+            target = self.target_transform(target)
+        return img, 0
+    def __len__(self):
+        return len(self.data)
+def set_seed(seed):
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    np.random.seed(seed)
+    random.seed(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+def preprocess_image(image):
+    pass
+def classify_image(image):
+    args = {
+        'overwrite': False,
+        'tqdm': 0,
+        'config_file': 'configs/clothing1m.yaml',
+        'dataset': 'clothing1M',
+        'root': './data',
+        'noise_type': 'clean',
+        'noise_rate': 0.0,
+        'save_dir': None,
+        'gpus': '0',
+        'num_workers': 8,
+        'grad_bound': 0.0,
+        'seed': 233,
+        'backbone': 'res50',
+        'optimizer': 'sgd',
+        'momentum': 0.9,
+        'nesterov': False,
+        'pretrained': True,
+        'ssl_pretrained': None,
+        'resume': model_path,
+        'lr': 0.01,
+        'scheduler': 'cos',
+        'milestones': None,
+        'gamma': None,
+        'weight_decay': 0.0001,
+        'batch_size': 128,
+        'start_epoch': None,
+        'epochs': 100,
+        'warmup': 0,
+        'ema': False,
+        'beta': 1.0,
+        'num_classes': 14,
+    }
+    device = 'cpu'
+    set_seed(args['seed'])
+    MEAN = (0.485, 0.456, 0.406)
+    STD = (0.229, 0.224, 0.225)
+    test_loader = DataLoader(
+        dataset=Clothing1M(
+            image=image,
+            train=False,
+            transform=transforms.Compose([
+                transforms.Resize(256),
+                transforms.CenterCrop(224),
+                transforms.ToTensor(),
+                transforms.Normalize(MEAN, STD)]
+            )),
+        batch_size=256,
+        shuffle=False,
+        pin_memory=True,
+        num_workers=args['num_workers'])
+    model = resnet50(num_classes=args['num_classes'], show=True)
+    nFeat = 2048
+    state_dict = ResNet50_Weights.IMAGENET1K_V2.get_state_dict(progress=True)
+    state_dict = {k:v for k,v in state_dict.items() if 'fc' not in k}
+    missing, unexpected = model.load_state_dict(state_dict, strict=False)
+    print('Loading ImageNet pretrained model')
+    print('Model missing keys:\n', missing)
+    print('Model unexpected keys:\n', unexpected)
+    checkpoint = torch.load(args['resume'], map_location=torch.device(device))
+    state_dict = checkpoint['model_state_dict']
+    for key in list(state_dict.keys()):
+        if 'ema_model' in key:
+            state_dict[key.replace('ema_model.', '')] = state_dict[key]
+            del state_dict[key]
+        else:
+            del state_dict[key]
+    model.load_state_dict(state_dict)
+    epoch = checkpoint['epoch']
+    if args['start_epoch'] is None:
+        args['start_epoch'] = epoch + 1
+    model = model.to(device)
+    loader_x, loader_y = None, None
+    for x, y in test_loader:
+        print(x)
+        print(y)
+        loader_x, loader_y = x.to(device), y.to(device)
+        break
+    z, _ = model(loader_x)
+    pred = torch.argmax(z, 1)
+    prediction_label = CATEGORY_NAMES[pred.item()]
+    return f'Predicted label: {prediction_label}'
+# Example image query (optional but recommended for demonstration)
+example_image = "./examples/image_0.jpg"  # Ensure this image is available in the repo
+example_image_2 = "./examples/image_7.jpg"
+# Create Gradio interface
+interface = gr.Interface(
+    fn=classify_image,
+    inputs=gr.Image(),
+    outputs=gr.Text(),
+    examples=[example_image, example_image_2]  # Include an example input for users -- you will want to find a relevant image to include and push it to your HuggingFace Space
+)
+if __name__ == "__main__":
+    interface.launch()